%macro mba(indset=,outlib=,analysis_unit=,basket_dimension=,level=3, threshold=100); /************************************************************* Macro name: %mba Purpose: For Market Basket Analysis Developer: John Xu, 1ST Consulting, 515-778-4093 © 2010 John Xu *************************************************************/ %macro wheres(L=); %let i=1; %do %while(%eval(&i<&L)); %do j=&i+1 %to &L; t&i..&basket_dimension.=t&j..&basket_dimension. and %end; %let i=%eval(&i+1); %end; %do i=2 %to &L-1; &analysis_unit._%eval(&i-1)<&analysis_unit._&i. and %end; &analysis_unit._%eval(&L-1)<&analysis_unit._&L %mend wheres; %macro permute(dset=,r=); proc sql noprint; select case when upcase(type)='CHAR' then '$' else ' ' end into :an_type from DICTIONARY.COLUMNS where libname='WORK' and upcase(memname)=upcase("&dset.") and upcase(name)=upcase("&analysis_unit._1"); quit; %put an_type= &an_type; data &dset(rename=(%do i=1 %to &r; an_&i=&analysis_unit._&i %end;)); set &dset; drop i j r1 - r&r copy &analysis_unit._1 - &analysis_unit._&r; array as(*) &analysis_unit._1 - &analysis_unit._&r; array ans(*) &an_type an_1 - an_&r; array check (*) r1 - r&r; %do m=1 %to &r; do r&m = 1 to &r; %end; copy=0; do i=2 to &r; do j=1 to i-1; if check(j)=check(i) then copy+1; end; end; if copy = 0 then do; %do m=1 %to &r; ans(r&m)=as(check(r&m)); %end; output; end; %do m=1 %to &r; end; %end; run; proc sort data=&dset; by %do i=1 %to &r; &analysis_unit._&i %end;; run; %mend permute; proc sort data=&indset.(keep=&analysis_unit. &basket_dimension.) out =mba_data nodupkey; by &analysis_unit. &basket_dimension.; run; proc sql noprint; /* Get BASKET_DIMENSION */ create table Basket_Dimension as select &basket_dimension. ,count(*) as count from mba_data group by &basket_dimension.; create table Basket_Dimension_Summary as select count as Basket_Dimension ,count(*) as Basket_Count from Basket_Dimension group by count; /** Get TOT_BASKET_DIMENSIONS **/ select sum(Basket_Count) into :TOT_BD from Basket_Dimension_Summary; /** Level 1 Result **/ create table &outlib..mba_level_1 as select &analysis_unit as &analysis_unit ,count(&basket_dimension) as count ,(calculated count)/&TOT_BD. as percent from mba_data group by &analysis_unit order by &analysis_unit; create table level_1t as select &analysis_unit as &analysis_unit._1 ,count as count_1 from &outlib..mba_level_1; %do L=2 %to &level; create table mba_data as select a.* from mba_data a, Basket_Dimension b where a.&basket_dimension = b.&basket_dimension and b.count >= &L order by &basket_dimension; /* Get Co-occurence */ create table level_&L.t as select %do i=1 %to &L; t&i..&analysis_unit. as &analysis_unit._&i, %end; count(*) as count_&L from mba_data t1 %do i=2 %to &L.;, mba_data t&i %end; where %wheres(L=&L) group by &analysis_unit._1 %do i=2 %to &L.; , &analysis_unit._&i %end; %if &threshold^=0 %then %do; having count(*)>=&threshold %end; ; quit; /* perform permutation */ %permute(dset=level_&L.t,r=&L); /* Get MBA result for the level */ proc sql noprint; create table &outlib..mba_level_&L as select %do i=1 %to %eval(&L-1); b.&analysis_unit._&i as ANALYSIS_UNIT_&i, %end; a.count_%eval(&L-1) as ANALYSIS_UNIT_FREQ ,b.&analysis_unit._&L as ASSOC_ANALYSIS_UNIT ,c.count_1 as ASSOC_ANALYSIS_UNIT_FREQ ,b.count_&L as CO_OCCUR_FREQ ,b.count_&L/a.count_%eval(&L-1) as CONFIDENCE ,c.count_1 /&TOT_BD as EXPECTED_CONFIDENCE ,b.count_&L/&TOT_BD as SUPPORT ,(calculated CONFIDENCE)/ (calculated EXPECTED_CONFIDENCE) as LIFT from level_%eval(&L-1)t a, level_&L.t b, level_1t c where %do i=1 %to %eval(&L-1); a.&analysis_unit._&i=b.&analysis_unit._&i and %end; b.&analysis_unit._&L=c.&analysis_unit._1; %end; quit; proc print data=Basket_Dimension_Summary; title 'Basket_Dimension_Summary'; run; %mend mba; /* Testing program */ options mprint; libname test 'c:\data'; option notes; data test.sample; do customerkey=1 to 100000; jmax=int(uniform(13433423)*9)+1; do j=1 to jmax; *product='P'|| put(int(uniform(53466)**3 *300) + 1,z4.0); product=int(uniform(53466)**3 *300) + 1; output; end; end; run; %mba(indset=test.sample,outlib=test,analysis_unit=product,basket_dimension=customerkey,level=6); proc print data=test.mba_level_1; title "MBA output"; run; proc print data=test.mba_level_2(obs=10); run; proc print data=test.mba_level_3(obs=10); run; proc print data=test.mba_level_4(obs=10); run; proc print data=test.mba_level_5(obs=10); run;